Setup R env. Load packages and set default image export formats, size and resolution.
knitr::opts_chunk$set(echo = TRUE,
fig.height = 8,
fig.width = 12,
dev = c("png", "pdf"),
dpi = 1000)
library(ggplot2)
library(ggtree)
## ggtree v3.8.2 For help: https://yulab-smu.top/treedata-book/
##
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
##
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
##
## S Xu, Z Dai, P Guo, X Fu, S Liu, L Zhou, W Tang, T Feng, M Chen, L
## Zhan, T Wu, E Hu, Y Jiang, X Bo, G Yu. ggtreeExtra: Compact
## visualization of richly annotated phylogenetic data. Molecular Biology
## and Evolution. 2021, 38(9):4039-4042. doi: 10.1093/molbev/msab166
##
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR
## Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package
## for phylogenetic tree input and output with richly annotated and
## associated data. Molecular Biology and Evolution. 2020, 37(2):599-603.
## doi: 10.1093/molbev/msz240
library(aplot)
library(treeio)
## treeio v1.25.3 For help: https://yulab-smu.top/treedata-book/
##
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
##
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR
## Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package
## for phylogenetic tree input and output with richly annotated and
## associated data. Molecular Biology and Evolution. 2020, 37(2):599-603.
## doi: 10.1093/molbev/msz240
##
## G Yu. Data Integration, Manipulation and Visualization of Phylogenetic
## Trees (1st ed.). Chapman and Hall/CRC. 2022. ISBN: 9781032233574
##
## S Xu, Z Dai, P Guo, X Fu, S Liu, L Zhou, W Tang, T Feng, M Chen, L
## Zhan, T Wu, E Hu, Y Jiang, X Bo, G Yu. ggtreeExtra: Compact
## visualization of richly annotated phylogenetic data. Molecular Biology
## and Evolution. 2021, 38(9):4039-4042. doi: 10.1093/molbev/msab166
library(tibble)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape2)
library(scales)
library(ggrepel)
library(stringr)
library(glue)
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:ggtree':
##
## rotate
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
## The following object is masked from 'package:ggtree':
##
## expand
library(aplot)
library(knitr)
options(scipen = 999) #Prevent scientific notation
Function to plot BUSCO categories - uses the standard BUSCO color scheme and also plots the percent for each category.
format_values_1 = function(values){
out <- c()
for(value in values) {
if(value != 0){
out <- c(out, sprintf(" %1.1f%%", value))
} else {
out <- c(out, "")
}
}
return(out)
}
plot_BUSCO_percentages = function(data, plot_title,
plot.axis.text.y=FALSE,
plot.legend=FALSE,
plot_size_ratio=1, plot_family="sans",
my_colors=c("#56B4E9", "#3492C7", "#F0E442", "#F04442") # Color pallet for BUSCO gene categories.
)
{
p <- data %>%
melt(id.var="SampleID") %>%
rename(BUSCO_categories = variable, Count = value) %>%
mutate(BUSCO_categories = factor(BUSCO_categories, c("Complete", "Single-copy", "Duplicated", "Fragmented", "Missing"))) %>%
filter(BUSCO_categories!="Complete") %>%
filter(BUSCO_categories!="Total") %>%
ggplot(aes(y = Count, x = SampleID, fill = BUSCO_categories)) +
geom_bar(position = position_stack(reverse = TRUE),
stat="identity") +
coord_flip() +
# geom_text_repel: https://ggrepel.slowkow.com/articles/examples.html
# geom_text_repel(aes(label = format_values_1(Count)), # Add two spaces in from of text so it centers off the number not number%
# size = rel(5)*plot_size_ratio,
# fontface = "bold",
# max.overlaps = Inf, #Set max.overlaps = Inf to override this behavior and always show all labels, regardless of too many overlaps.
# position = position_stack(vjust=0.5, reverse = TRUE),
# xlim = c(-Inf, Inf), # Set xlim or ylim to Inf or -Inf to disable repulsion away from the edges of the panel.
# ylim = c(-Inf, Inf),
# point.size = NA, #size of each point for each text label; Set point.size = NA to prevent label repulsion away from data points.
# force=0.5, # force of repulsion between overlapping text labels
# box.padding=0, # padding around the text label
# direction="y") + # move text labels “both” (default), “x”, or “y” directions
theme_gray(base_size = 8) +
scale_y_continuous(labels = c("0","20","40","60","80","100"),
breaks = c(0,20,40,60,80,100)) +
scale_fill_manual(values = my_colors,
labels = c("Complete (C) and single-copy (S)",
"Complete (C) and duplicated (D)",
"Fragmented (F)",
"Missing (M)")) +
ggtitle(plot_title) +
xlab("") +
ylab("%BUSCOs") +
theme(plot.title = element_text(family = plot_family,
hjust=0.5,
colour = "black",
size = rel(2.2)*plot_size_ratio,
face = "bold")) +
theme(plot.margin=unit(c(0, 0, 0, 0), "mm")) +
theme(panel.background = element_rect(color = "#FFFFFF", fill = "white")) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major = element_blank()) +
theme(axis.text.y = element_text(family = plot_family, face = "bold", size = rel(1.66)*plot_size_ratio, colour = "black")) +
theme(axis.text.x = element_text(family = plot_family, face = "bold", size = rel(1.66)*plot_size_ratio, colour = "black")) +
theme(axis.line = element_line(linewidth = 1*plot_size_ratio, colour = "black")) +
theme(axis.ticks.length = unit(0.85*plot_size_ratio, "cm")) +
theme(axis.ticks.y = element_line(colour = "white", linewidth = 0)) +
theme(axis.ticks.x = element_line(colour = "#222222")) +
theme(axis.ticks.length = unit(0.4*plot_size_ratio, "cm")) +
theme(axis.title.x = element_text(family = plot_family, size = rel(2)*plot_size_ratio)) +
theme(legend.position = "bottom", legend.title = element_blank()) +
theme(legend.text = element_text(family = plot_family, size = rel(1.2)*plot_size_ratio)) +
theme(legend.key.size = unit(1.5*plot_size_ratio,"line")) +
guides(fill = guide_legend(override.aes = list(colour = NULL))) +
guides(fill = guide_legend(nrow = 1, byrow = TRUE)) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
if (!plot.axis.text.y) {
p <- p + theme(axis.text.y=element_blank())
}
if (!plot.legend) {
p <- p + theme(legend.position = "none")
}
return(p)
}
Function to plot basic single-category bar chart (not stacked). User provided color for bars.
format_values_2 = function(values){
out <- c()
for(value in values) {
if(value != 0){
out <- c(out, scales::comma(value))
} else {
out <- c(out, "")
}
}
return(out)
}
plot_barchart = function(data, column.id,
plot_title, xlab, bar.fill,
divide.by=1, y.log2=FALSE, y.log10=FALSE,
add.geom_text=TRUE, plot.axis.text.y=FALSE,
plot_size_ratio=1, plot_family="sans")
{
p <- data %>%
melt(id.var="SampleID") %>%
filter(variable==column.id) %>%
mutate(value = as.numeric(value)) %>%
mutate(value = value/divide.by) %>%
ggplot(aes(y = value, x = SampleID)) +
geom_bar(position = position_stack(),
stat="identity",
fill=bar.fill) +
scale_y_continuous(labels = comma) +
coord_flip() +
theme_gray(base_size = 8) +
ggtitle(plot_title) +
xlab("") +
ylab(xlab) +
theme(plot.title = element_text(family = plot_family,
hjust=0.5,
colour = "black",
size = rel(2.2)*plot_size_ratio,
face = "bold")) +
theme(plot.margin=unit(c(0, 0, 0, 0), "mm")) +
theme(legend.position="top",legend.title = element_blank()) +
theme(legend.text = element_text(family = plot_family, size = rel(1.2)*plot_size_ratio)) +
theme(legend.key.size = unit(1.5*plot_size_ratio,"line")) +
theme(panel.background = element_rect(color = "#FFFFFF", fill = "white")) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major = element_blank()) +
theme(axis.text.y = element_text(family = plot_family, face = "bold", size = rel(1.66)*plot_size_ratio, colour = "black")) +
theme(axis.text.x = element_text(family = plot_family, face = "bold", size = rel(1.66)*plot_size_ratio, colour = "black")) +
theme(axis.line = element_line(colour = "black", linewidth = 1*plot_size_ratio)) +
theme(axis.ticks.length = unit(0.85*plot_size_ratio, "cm")) +
theme(axis.ticks.y = element_line(colour = "white", linewidth = 0)) +
theme(axis.ticks.x = element_line(colour = "#222222")) +
theme(axis.ticks.length = unit(0.4*plot_size_ratio, "cm")) +
theme(axis.title.x = element_text(family = plot_family, size = rel(2)*plot_size_ratio)) +
guides(fill = guide_legend(override.aes = list(colour = NULL))) +
guides(fill = guide_legend(nrow = 1, byrow = TRUE)) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
if (y.log10) {
p <- p + scale_y_continuous(trans="log10")
}
if (y.log2) {
p <- p + scale_y_continuous(trans="log2")
}
if (add.geom_text) {
p <- p + geom_text(aes(label=format_values_2(value), y=0), size=0.75, hjust='left')
}
if (!plot.axis.text.y) {
p <- p + theme(axis.text.y=element_blank())
}
return(p)
}
Function to generate a formatted taxa names from samples sheet.
# Format for plotting
# See https://yulab-smu.top/treedata-book/faq.html?q=rename#faq-formatting-label
format_sp_name = function(x){
n = x[["genera"]]
# Add species if present
if(x[["species"]] != ""){
n = paste(n, x[["species"]], sep="~")
}
# Either print with extra info (not italics) or not
if(x[["extra"]] != ""){
e = str_replace_all(x[["extra"]], " ", "~")
e = str_replace_all(e, ",", "")
lab = glue(paste("italic(",n,")~",e, sep=''))
} else {
lab = glue(paste("italic(",n,")", sep=''))
}
return(lab)
}
Function to plot tree + heatmap
get_colors <- function(seq.count.max){
color.list <- c("white", colorRampPalette(c("#ddf1da","#d53e4f"))(5))
for (i in 1:seq.count.max){
color.list <- c(color.list, "#d53e4f")
}
return(color.list[1:seq.count.max])
}
plot_tree_heatmap <- function(p,
OG.class, OG.seqs, OG.annots,
select.best_strata_taxa, select.designation,
select.best_strata_species_in_OG_proportion_total,
plot.heatmap.rownames=FALSE,
plot_size_ratio=0.2){
# Select OGs to plot
selected.OGs.class <- OG.class %>%
filter(best_strata_taxa == select.best_strata_taxa) %>%
filter(designation == select.designation) %>%
filter(best_strata_species_in_OG_proportion_total >= select.best_strata_species_in_OG_proportion_total) %>%
arrange(orthogroup_id)
selected.OGs <- selected.OGs.class %>%
select(orthogroup_id)
# Check that we have OGs to plot
if(length(selected.OGs$orthogroup_id) == 0){
print("No OGs found using the given filtering cutoffs. Returning nothing.")
return()
}
# Get sequence membership and count number of seqs per genome/transcriptome
selected.OGs.seqs <- OG.seqs %>%
filter(orthogroup_id %in% selected.OGs$orthogroup_id) %>%
arrange(orthogroup_id, species_id, sequence_id)
selected.OGs.seqs.counts <- selected.OGs.seqs %>%
dplyr::group_by(orthogroup_id, species_id) %>%
dplyr::summarise(n_seq = n(), .groups="keep") %>%
pivot_wider(names_from = species_id, values_from = n_seq, values_fill=0)
# Get max number of sequences per species (used for setting color range during plotting)
seq.count.max <- max(selected.OGs.seqs.counts %>% ungroup() %>% select(-orthogroup_id))
# Get sequence annotations
selected.OGs.annots <- merge(selected.OGs.seqs, OG.annots, by="sequence_id", sort=FALSE)
# Add missing genomes/transcriptomes to matrix - need to do since we are selecting OGs specific to taxonomic subset.
all.names <- samples$sample_id
not.in.matrix <- all.names[!all.names %in% colnames(selected.OGs.seqs.counts)]
for (n in not.in.matrix){
selected.OGs.seqs.counts[, n] <- 0
}
# Melt matrix into 3-columns
selected.OGs.seqs.counts <- melt(selected.OGs.seqs.counts, id.vars = "orthogroup_id") %>%
mutate(value = factor(value))
# Create additional column where zeros are NA (helpful during plotting since we can specifically set NA as white)
selected.OGs.seqs.counts$value_nas <- ifelse(selected.OGs.seqs.counts$value==0,NA, selected.OGs.seqs.counts$value)
scaling.factor <- min(c((20 / nrow(selected.OGs)), 1))
plot_family <- "sans"
plot_title <- paste(select.best_strata_taxa, select.designation, sep=' ')
hm <- ggplot(selected.OGs.seqs.counts, aes(x=orthogroup_id, y=variable)) +
geom_tile(aes(fill=cut(value_nas, breaks=0:seq.count.max, labels=0:(seq.count.max-1)))) +
scale_fill_manual(drop=FALSE, values=get_colors(seq.count.max), na.value="white", name="No. Genes") +
theme_minimal() + xlab(NULL) + ylab(NULL) + ggtitle(plot_title) +
theme(axis.text.x = element_text(family = plot_family,
face = "bold",
size = rel(scaling.factor * 4)*plot_size_ratio,
colour = "black",
angle = 90,
)
) +
theme(axis.text.y = element_text(family = plot_family,
face = "bold",
size = rel(1.66)*plot_size_ratio,
colour = "black",
)
) +
theme(plot.title = element_text(family = plot_family,
face = "bold",
size = rel(3)*plot_size_ratio,
colour = "black",
hjust = 0.5,
)
) +
theme(legend.text = element_text(family = plot_family, size = rel(1.2)*plot_size_ratio)) +
theme(legend.key.size = unit(1.5*plot_size_ratio,"line")) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
# Remove heatmap row names?
if(!plot.heatmap.rownames){
hm <- hm + theme(axis.text.y = element_blank())
}
# Plot heatmap against tree
#p <- hm %>% insert_left(p, width=1)
# Return plot
return(hm)
}
Load the cleaned and processed datasets into R for plotting.
species.tree <- read.iqtree(
"../01_Orthofinder/Run2.SpeciesTree_rooted_node_labels.tre")
## Warning in sub("/.*", "", nlabel) %>% as.numeric: NAs introduced by coercion
## Warning in sub(".*/", "", nlabel) %>% as.numeric: NAs introduced by coercion
species.names <- as.phylo(species.tree)$tip.label
samples <- read.csv("../samples.txt", header=TRUE, sep='\t') %>%
filter(sample_id %in% species.names)
genome.stats <- read.table(
"../02_QC_stats_plot/all_genomes-01_stats-results.tsv", sep='\t',
header=TRUE, check.names=FALSE, stringsAsFactors=FALSE) %>%
filter(SampleID %in% species.names)
transcriptome.stats <- read.table(
"../02_QC_stats_plot/all_transcriptomes-01_stats-results.tsv", sep='\t',
header=TRUE, check.names=FALSE, stringsAsFactors=FALSE) %>%
filter(SampleID %in% species.names) %>%
rename(`Number of genes` = `Number of CDS`)
genomeData.busco.genome.eukaryota <- read.table(
"../02_QC_stats_plot/all_genomes-02_busco-genome.fa.busco_eukaryota_odb10-results.tsv", sep='\t',
header=TRUE, check.names=FALSE) %>%
filter(SampleID %in% species.names)
genomeData.busco.genome.metazoa <- read.table(
"../02_QC_stats_plot/all_genomes-02_busco-genome.fa.busco_metazoa_odb10-results.tsv", sep='\t',
header=TRUE, check.names=FALSE) %>%
filter(SampleID %in% species.names)
genomeData.busco.protein.eukaryota <- read.table(
"../02_QC_stats_plot/all_genomes-02_busco-pep.faa.busco_eukaryota_odb10-results.tsv", sep='\t',
header=TRUE, check.names=FALSE) %>%
filter(SampleID %in% species.names)
genomeData.busco.protein.metazoa <- read.table(
"../02_QC_stats_plot/all_genomes-02_busco-pep.faa.busco_metazoa_odb10-results.tsv", sep='\t',
header=TRUE, check.names=FALSE) %>%
filter(SampleID %in% species.names)
transcriptomeData.busco.protein.eukaryota <- read.table(
"../02_QC_stats_plot/all_transcriptomes-02_busco-pep.faa.busco_eukaryota_odb10-results.tsv", sep='\t',
header=TRUE, check.names=FALSE) %>%
filter(SampleID %in% species.names)
transcriptomeData.busco.protein.metazoa <- read.table(
"../02_QC_stats_plot/all_transcriptomes-02_busco-pep.faa.busco_metazoa_odb10-results.tsv", sep='\t',
header=TRUE, check.names=FALSE) %>%
filter(SampleID %in% species.names)
Merge transcriptome and genome datasets together so that all species from the tree are represented (species without this data get zero values).
# Genome/Transcriptome stats
seq.stats <- merge(genome.stats, transcriptome.stats, all=TRUE) %>% replace(is.na(.), 0)
# BUSCO results
busco.protein.eukaryota <- rbind(genomeData.busco.protein.eukaryota, transcriptomeData.busco.protein.eukaryota)
busco.protein.metazoa <- rbind(genomeData.busco.protein.metazoa, transcriptomeData.busco.protein.metazoa)
busco.genome.eukaryota <- data.frame(SampleID=species.names)
busco.genome.eukaryota <- merge(x=busco.genome.eukaryota, y=genomeData.busco.genome.eukaryota, all.x=TRUE) %>% replace(is.na(.), 0)
busco.genome.metazoa <- data.frame(SampleID=species.names)
busco.genome.metazoa <- merge(x=busco.genome.metazoa, y=genomeData.busco.genome.metazoa, all.x=TRUE) %>% replace(is.na(.), 0)
Load OG info.
data.class <- read.csv("../06_Final_Classifications/Orthogroups.Run2.classification.tsv.gz", header=TRUE, sep='\t')
data.seqs <- read.csv("../06_Final_Classifications/Orthogroups.Run2.long.tsv.gz", header=TRUE, sep='\t')
data.annots <- read.csv("../06_Final_Classifications/Orthogroups.Run2.sequences.nr.top_hits.tsv.gz", header=TRUE, sep='\t')
Global setting for plotting
plot_size_ratio <- 0.2
plot.axis.text.y <- FALSE
plot.legend <- FALSE
plot_family <- "sans"
Data.frame with clade positions in tree and colors.
clade.colors <- data.frame(
node = c(132, 138, 144, 146, 148, 164, 171, 172, 183, 179, 130, 126, 122, 124, 134, 136, 133, 56, 57),
fill = c("blue", "red", "brown", "pink", "yellow", "purple", "green", "orange", "blue", "red", "purple", "green", "yellow", "red", "blue", "blue", "yellow", "green", "blue"),
alpha = c(0.2, 0.4, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.5, 0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.4, 0.2),
n = c("Cnidaria", "Staurozoa", "Scyphozoa", "Cubozoa", "Hydrozoa", "Octocorallia", "Hexacorallia", "Actiniaria", "Scleractinia", "Corallimorpharia", "Placozoa", "Porifera", "Ctenophora", "Choanoflagellata", "Myxozoa", "Medusozoa", "Endocnidozoa", "Zoantharia", "Scleractinia")
)
Format species tree. Color clades and rename taxa into a pretty format (i.e., genera+species italics + extra info not italics).
samples$name <- apply(samples, 1, format_sp_name )
p <- ggtree(species.tree) %<+% samples +
geom_highlight(
data = clade.colors,
mapping = aes(
node=node,
fill=I(fill),
alpha=I(alpha),
),
extendto = 1.4,
to.bottom=TRUE,
) +
geom_cladelab(
data = clade.colors,
mapping = aes(
node=node,
label=n,
),
align = TRUE,
offset.text = 0.0,
hjust = "center",
fontsize = 2,
offset = 0.2,
barsize = 0,
fontface="bold",
) +
geom_tiplab(aes(label=name), size=6*plot_size_ratio, linesize=0.2, parse=TRUE, align=TRUE, offset=0.01) +
geom_tippoint(aes(color=data_type), size=6*plot_size_ratio, show.legend=FALSE) +
hexpand(.1)
#p
Count number of sequences in each designation per species.
data.seqs.class <- merge(data.seqs, data.class)
data.species.OGtype.count <- data.seqs.class %>%
select(orthogroup_id, species_id, sequence_id, designation) %>%
unique() %>%
group_by(species_id, designation) %>%
count()
Count number of sequences in each designation per species.
data.seqs.class %>%
select(orthogroup_id, species_id, sequence_id, designation) %>%
unique() %>%
group_by(designation) %>%
count()
## # A tibble: 4 × 2
## # Groups: designation [4]
## designation n
## <chr> <int>
## 1 Dark-Restricted 384502
## 2 Dark-Shared 51367
## 3 Light-Restricted 65872
## 4 Light-Shared 3276409
p.OGtype.absolute <- data.species.OGtype.count %>%
ggplot(aes(y = n, x = species_id)) +
geom_bar(aes(fill=designation),
position = position_stack(),
stat="identity") +
scale_fill_manual(values = c("#984ea3", "#bebada", "#ff7f00", "#fdbf6f")) +
scale_y_continuous(labels = comma) +
theme_gray(base_size = 8) +
ggtitle("No of genes") +
xlab("") +
ylab("No. of genes") +
theme(plot.title = element_text(family = plot_family,
hjust=0.5,
colour = "black",
size = rel(2.2)*plot_size_ratio,
face = "bold")) +
theme(plot.margin=unit(c(0, 0, 0, 0), "mm")) +
theme(legend.position="top",legend.title = element_blank()) +
theme(legend.text = element_text(family = plot_family, size = rel(1.2)*plot_size_ratio)) +
theme(legend.key.size = unit(1.5*plot_size_ratio,"line")) +
theme(panel.background = element_rect(color = "#FFFFFF", fill = "white")) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major = element_blank()) +
theme(axis.text.y = element_text(family = plot_family, face = "bold", size = rel(1.66)*plot_size_ratio, colour = "black")) +
theme(axis.text.x = element_text(family = plot_family, face = "bold", size = rel(1.66)*plot_size_ratio, colour = "black")) +
theme(axis.line = element_line(colour = "black", linewidth = 1*plot_size_ratio)) +
theme(axis.ticks.length = unit(0.85*plot_size_ratio, "cm")) +
theme(axis.ticks.y = element_line(colour = "white", linewidth = 0)) +
theme(axis.ticks.x = element_line(colour = "#222222")) +
theme(axis.ticks.length = unit(0.4*plot_size_ratio, "cm")) +
theme(axis.title.x = element_text(family = plot_family, size = rel(2)*plot_size_ratio)) +
guides(fill = guide_legend(override.aes = list(colour = NULL))) +
guides(fill = guide_legend(nrow = 1, byrow = TRUE)) +
#geom_text(aes(label = n), size=1.0, position = position_stack(vjust = 0.5, reverse = TRUE)) +
theme(axis.text.y=element_blank()) +
coord_flip() +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
p.OGtype.relative <- data.species.OGtype.count %>%
ggplot(aes(y = n, x = species_id)) +
geom_bar(aes(fill=designation),
stat="identity",
position = 'fill') +
scale_fill_manual(values = c("#984ea3", "#bebada", "#ff7f00", "#fdbf6f")) +
scale_y_continuous(labels = comma) +
theme_gray(base_size = 8) +
ggtitle("Relative No. of genes") +
xlab("") +
ylab("Relative No. of genes") +
theme(plot.title = element_text(family = plot_family,
hjust=0.5,
colour = "black",
size = rel(2.2)*plot_size_ratio,
face = "bold")) +
theme(plot.margin=unit(c(0, 0, 0, 0), "mm")) +
theme(legend.position="top",legend.title = element_blank()) +
theme(legend.text = element_text(family = plot_family, size = rel(1.2)*plot_size_ratio)) +
theme(legend.key.size = unit(1.5*plot_size_ratio,"line")) +
theme(panel.background = element_rect(color = "#FFFFFF", fill = "white")) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major = element_blank()) +
theme(axis.text.y = element_text(family = plot_family, face = "bold", size = rel(1.66)*plot_size_ratio, colour = "black")) +
theme(axis.text.x = element_text(family = plot_family, face = "bold", size = rel(1.66)*plot_size_ratio, colour = "black")) +
theme(axis.line = element_line(colour = "black", linewidth = 1*plot_size_ratio)) +
theme(axis.ticks.length = unit(0.85*plot_size_ratio, "cm")) +
theme(axis.ticks.y = element_line(colour = "white", linewidth = 0)) +
theme(axis.ticks.x = element_line(colour = "#222222")) +
theme(axis.ticks.length = unit(0.4*plot_size_ratio, "cm")) +
theme(axis.title.x = element_text(family = plot_family, size = rel(2)*plot_size_ratio)) +
guides(fill = guide_legend(override.aes = list(colour = NULL))) +
guides(fill = guide_legend(nrow = 1, byrow = TRUE)) +
#geom_text(aes(label = n), size=1.0, position = position_stack(vjust = 0.5, reverse = TRUE)) +
theme(axis.text.y=element_blank()) +
coord_flip() +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
p.gene.stats.count <- plot_barchart(data=seq.stats,
column.id="Number of genes",
plot_title="Protein-coding genes",
xlab="Number of genes",
bar.fill="#d9d9d9",
plot.axis.text.y=plot.axis.text.y,
plot_size_ratio=plot_size_ratio,
add.geom_text=FALSE)
p.busco.protein.eukaryota <- plot_BUSCO_percentages(
data=busco.protein.eukaryota,
plot_title="BUSCO Eukaryota - Protein",
plot.axis.text.y=plot.axis.text.y,
plot.legend=plot.legend,
plot_size_ratio=plot_size_ratio)
# Plot heatmap of species presence in each Scleractinia Dark-Shared OG.
p.ScleractiniaDarkshared <- plot_tree_heatmap(combined.plot, data.class, data.seqs, data.annots, "Scleractinia", "Dark-Shared", 50, plot_size_ratio=0.022)
# Plot heatmap of species presence in each Scleractinia Dark-Restricted OG.
p.ScleractiniaDarkRestricted <- plot_tree_heatmap(combined.plot, data.class, data.seqs, data.annots, "Scleractinia", "Dark-Restricted", 50, plot_size_ratio=0.025)
# Plot heatmap of species presence in each Hexacorallia Dark-Shared OG.
p.HexacoralliaDarkShared <- plot_tree_heatmap(combined.plot, data.class, data.seqs, data.annots, "Hexacorallia", "Dark-Shared", 50, plot_size_ratio=0.020)
# Plot heatmap of species presence in each Hexacorallia Dark-Restricted OG.
p.HexacoralliaDarkRestricted <- plot_tree_heatmap(combined.plot, data.class, data.seqs, data.annots, "Hexacorallia", "Dark-Restricted", 50, plot_size_ratio=0.200)
# Plot heatmap of species presence in each Cnidaria Dark-Shared OG.
p.CnidariaDarkShared <- plot_tree_heatmap(combined.plot, data.class, data.seqs, data.annots, "Cnidaria", "Dark-Shared", 50, plot_size_ratio=0.020)
# Plot heatmap of species presence in each Cnidaria Dark-Restricted OG.
p.CnidariaDarkRestricted <- plot_tree_heatmap(combined.plot, data.class, data.seqs, data.annots, "Cnidaria", "Dark-Restricted", 50, plot_size_ratio=0.032)
p.busco.protein.eukaryota %>%
insert_left(p, width=6) %>%
insert_right(p.gene.stats.count) %>%
insert_right(p.OGtype.absolute) %>%
insert_right(p.OGtype.relative) %>%
insert_right(p.CnidariaDarkRestricted, width = 1.00) %>%
insert_right(p.CnidariaDarkShared, width = 0.25) %>%
insert_right(p.HexacoralliaDarkRestricted, width = 6.00) %>%
insert_right(p.HexacoralliaDarkShared, width = 0.25) %>%
insert_right(p.ScleractiniaDarkRestricted, width = 0.70) %>%
insert_right(p.ScleractiniaDarkshared, width = 0.04)
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.45 tidyr_1.3.1 ggpubr_0.6.0 glue_1.7.0 stringr_1.5.1
## [6] ggrepel_0.9.5 scales_1.3.0 reshape2_1.4.4 dplyr_1.1.4 tibble_3.2.1
## [11] treeio_1.25.3 aplot_0.2.2 ggtree_3.8.2 ggplot2_3.4.4
##
## loaded via a namespace (and not attached):
## [1] yulab.utils_0.1.4 sass_0.4.8 utf8_1.2.4 generics_0.1.3
## [5] rstatix_0.7.2 ggplotify_0.1.2 stringi_1.8.3 lattice_0.22-5
## [9] digest_0.6.34 magrittr_2.0.3 evaluate_0.23 grid_4.3.1
## [13] fastmap_1.1.1 plyr_1.8.9 jsonlite_1.8.8 ape_5.7-1
## [17] backports_1.4.1 purrr_1.0.2 fansi_1.0.6 lazyeval_0.2.2
## [21] jquerylib_0.1.4 abind_1.4-5 cli_3.6.2 rlang_1.1.3
## [25] munsell_0.5.0 tidytree_0.4.6 withr_3.0.0 cachem_1.0.8
## [29] yaml_2.3.8 tools_4.3.1 parallel_4.3.1 ggsignif_0.6.4
## [33] memoise_2.0.1 colorspace_2.1-0 broom_1.0.5 vctrs_0.6.5
## [37] R6_2.5.1 gridGraphics_0.5-1 lifecycle_1.0.4 car_3.1-2
## [41] fs_1.6.3 ggfun_0.1.4 pkgconfig_2.0.3 pillar_1.9.0
## [45] bslib_0.6.1 gtable_0.3.4 Rcpp_1.0.12 highr_0.10
## [49] xfun_0.41 tidyselect_1.2.0 rstudioapi_0.15.0 farver_2.1.1
## [53] htmltools_0.5.7 nlme_3.1-164 patchwork_1.2.0 labeling_0.4.3
## [57] carData_3.0-5 rmarkdown_2.25 compiler_4.3.1